# -----------------------------------------------------------------------------
#
#    Frozen Nebula - First light of telescope for Lovebyte 2024
#    Copyright (C) 2024  Matthias Koch
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

.option norelax
.option rvc

# -----------------------------------------------------------------------------
#  Peripheral IO registers
# -----------------------------------------------------------------------------

  .equ RCU_BASE,     0x40021000
  .equ RCU_CTL,           0x000
  .equ RCU_CFG0,          0x004
  .equ RCU_APB1EN,        0x01C

  .equ DAC_BASE,     0x40007000
  .equ DAC_CTL,           0x400
  .equ DACC_R12DH,        0x420

  .equ ram_begin,    0x20000000
  .equ ram_end,      0x20008000

# -----------------------------------------------------------------------------
Reset:
# -----------------------------------------------------------------------------

  li x14, RCU_BASE           # 0x40021000 = RCU_BASE

  # PLL Initialisation

  #  1 << 29  PLL factor high bit
  # 10 << 18  PLL factor: 8/2 MHz * 27 = 108 MHz = HCLK = PCLK2
  #  4 <<  8  PCLK1 = HCLK/2 = 54 MHz. Maximum is 54 MHz.
  #  3 << 14  ADCPRE = PCLK2/8 = 13.5 MHz. Maximum is 14 MHz.
  #  2 <<  0  PLL is the system clock

  li x15, 1 << 29 | 10 << 18 | 4 << 8 | 3 << 14 | 2  # Config for 108 MHz
  sw x15, RCU_CFG0(x14)

  li x15, (1<<24) >> 16  # Set PLLEN to enable freshly configured PLL
  sh x15, RCU_CTL+2(x14) # Halfword access because low part of register needs to be kept on reset values

  # DAC Initialisation

  li x11, DAC_BASE-RCU_BASE  # 0xfffe6000
  sw x11, RCU_APB1EN(x14)    # Enable DAC (bit 29 = 0x20000000) and most of everything else
  add  x11, x11, x14         # 0x40007000 = DAC_BASE
  ori   x8, x11, 0x400       # 0x40007400 = DAC_BASE + 0x400
  srli x11, x11, 14          # 0x00010001
  sw x11, DAC_CTL-0x400(x8)  # Enable both DAC channels by setting DEN0 and DEN1

# -----------------------------------------------------------------------------

/*
testpattern:                 # For easy oscilloscope setup
  slli x11, x11, 2
  add x11, x11, x10

  srai x10, x10, 30
  add  x11, x11, x10

  srli x10, x10, 16
  add x10, x10, x11

  sw x10, DACC_R12DH-0x400(x8)
  j Reset
*/

# -----------------------------------------------------------------------------
#  Notes on register usage:
#
#   x6: Time counter
#
#   x8: Constant DAC_BASE
#   x9: Random
#  x10: Current rule
#  x11: Current x position
#  x12: Current y position
#  x13: Scratch
#  x14: Scratch
#  x15: Scratch
#
#  x16: Scratch
#
# -----------------------------------------------------------------------------

  # Rules are stored as six words each in memory, addressed with these offsets:

  .equ offset_e_f, 0 * 4 # 3 * 6
  .equ offset_a_c, 1 * 4 # 4 * 6
  .equ offset_b_d, 2 * 4 # 5 * 6
  .equ offset_f  , 3 * 4

  .equ rulesize, 8 * 4

# -----------------------------------------------------------------------------

clearram:                 # Clear complete RAM
  li x10, ram_begin
  li x16, ram_end         # The zeroed-out rule set will
1:sw zero, 0(x10)         # set the current coordinates to 0, 0 later.
  addi x10, x10, 4
  bne x10, x16, 1b        # Animation starts with a dot in the middle.

initialise:
  li x9, 7                # Random seed

# -----------------------------------------------------------------------------

mainloop:
  addi x6, x6, 1          # Time counter
  slli x15, x6, 17        # Sometimes...
  bnez x15, time_flows    #   we need a rule change:

    c.jal xorshift        # Choose which rule variable to change.
    andi x13, x9, 0xFC    # Mask for maximum of 8 rules
    li x15, ram_begin
    add x13, x13, x15

    c.jal xorshift        # Choose new value for this rule variable
    srai x15, x9, 16      # Signed, but value must fit in 16 bits
    sw x15, 1024(x13)

# -----------------------------------------------------------------------------

time_flows:               # Slowly progress to new rule set
  slli x15, x6, 25        # Sometimes...
  bnez x15, choose_rule   #   we need one more step towards future.

  li x10, ram_begin       # Scan the rule area
  li x16, 256

update_loop:
  lw x13,    0(x10)       # Load current parameter
  lw x14, 1024(x10)       # Load new parameter
  sub x14, x14, x13       # Difference

  srai x15, x14, 31       # Calculate sign of difference, -1, 0, 1
  sub x14, zero, x14
  srli x14, x14, 31
  or x15, x15, x14

  add x13, x13, x15       # Add difference to current parameter
  sw x13, 0(x10)

  addi x10, x10, 4        # Memory loop
  addi x16, x16, -1
  bnez x16, update_loop

# -----------------------------------------------------------------------------

choose_rule:
  # For an IFS fractal to look nice, different rules need different probabilities.
  # We are using five of the prepared maximum of eight rules here.

  li x10, ram_begin

  c.jal xorshift

  andi x15, x9, 0xF
  bnez x15, 1f
    addi x10, x10, rulesize
1:

  andi x15, x15, 0x7
  bnez x15, 1f
    addi x10, x10, rulesize
1:

  andi x15, x15, 0x3
  bnez x15, 1f
    addi x10, x10, rulesize
1:

  andi x15, x15, 0x1
  bnez x15, 1f
    addi x10, x10, rulesize
1:

# -----------------------------------------------------------------------------

ifs: # Calculate the next IFS step using the selected rule.
  c.jal ifs_xy
  mv x14, x13
  addi x10, x10, offset_f
  c.jal ifs_xy
  mv x11, x14
  mv x12, x13

pixel: # Draw a pixel on the screen. Scale and center the values.
  srai x13, x13, 5
  srai x14, x14, 5

  addi x13, x13, 0x7FF
  addi x15, x14, 0x7FF

  slli x15, x15, 16
  add  x15, x15, x13

  sw x15, DACC_R12DH-0x400(x8) # This way both channels get new values at the same moment
  j mainloop

# -----------------------------------------------------------------------------
ifs_xy: # Calculate one of the IFS equations:
        # x' = a*x + b*y  + e
        # y' = c*x + d*y  + f
# -----------------------------------------------------------------------------

  lw x13, offset_e_f(x10)

  lw x15, offset_a_c(x10)
  mul x15, x11, x15
  srai x15, x15, 15 # Fixpoint
  add x13, x13, x15

  lw x15, offset_b_d(x10)
  mul x15, x12, x15
  srai x15, x15, 15 # Fixpoint
  add x13, x13, x15

  ret

# -----------------------------------------------------------------------------
xorshift: # Pseudo random number generator
# -----------------------------------------------------------------------------

  slli x15, x9, 13
  xor  x9, x9, x15

  srli x15, x9, 17
  xor  x9, x9, x15

  slli x15, x9, 5
  xor  x9, x9, x15

  ret

#  raseed ^= (raseed << 13);
#  raseed ^= (raseed >> 17);
#  raseed ^= (raseed << 5);

# -----------------------------------------------------------------------------
signature: .byte 'M', 'e', 'c', 'r', 'i', 's', 'p', '.'
# -----------------------------------------------------------------------------
